home *** CD-ROM | disk | FTP | other *** search
- /* Copyright (c) 1991 Sun Wu and Udi Manber. All Rights Reserved. */
- #include "agrep.h"
- #include "checkfile.h"
-
- unsigned Mask[MAXSYM];
- unsigned Init1, NO_ERR_MASK, Init[MaxError];
- unsigned Bit[WORD+1];
- CHAR buffer[BlockSize+Maxline+1];
- unsigned Next[MaxNext], Next1[MaxNext];
- unsigned wildmask, endposition, D_endpos;
- int REGEX, RE_ERR, FNAME, WHOLELINE, SIMPLEPATTERN;
- int COUNT, HEAD, TAIL, LINENUM, INVERSE, I, S, DD, AND, SGREP, JUMP;
- int Num_Pat, PSIZE, num_of_matched, SILENT, NOPROMPT, BESTMATCH, NOUPPER;
- int NOMATCH, TRUNCATE, FIRST_IN_RE, FIRSTOUTPUT;
- int WORDBOUND, DELIMITER, D_length;
- int EATFIRST, OUTTAIL;
- int FILEOUT;
- int DNA = 0;
- int APPROX = 0;
- int PAT_FILE = 0;
- int CONSTANT = 0;
- int total_line = 0; /* used in mgrep */
-
- CHAR **Textfiles; /* array of filenames to be searched */
- CHAR old_D_pat[MaxDelimit] = "\n"; /* to hold original D_pattern */
- CHAR CurrentFileName[MAXNAME];
- CHAR Progname[MAXNAME];
- CHAR D_pattern[MaxDelimit] = "\n; "; /* string which delimits records --
- defaults to newline */
- int NOFILENAME = 0, /* Boolean flag, set for -h option */
- FILENAMEONLY = 0,/* Boolean flag, set for -l option */
- Numfiles = 0; /* indicates how many files in Textfiles */
- extern int init();
- int table[WORD][WORD];
-
- initial_value()
- {
- int i;
-
- JUMP = REGEX = FNAME = BESTMATCH = NOPROMPT = NOUPPER = 0;
- COUNT = LINENUM = WHOLELINE = SGREP = 0;
- EATFIRST = INVERSE = AND = TRUNCATE = OUTTAIL = 0;
- FIRST_IN_RE = NOMATCH = FIRSTOUTPUT = ON;
- I = DD = S = 1;
- HEAD = TAIL = ON;
- D_length = 2;
- SILENT = Num_Pat = PSIZE = SIMPLEPATTERN = num_of_matched = 0 ;
- WORDBOUND = DELIMITER = RE_ERR = 0;
- Bit[WORD] = 1;
- for (i = WORD - 1; i > 0 ; i--) Bit[i] = Bit[i+1] << 1;
- for (i=0; i< MAXSYM; i++) Mask[i] = 0;
- }
-
- compute_next(M, Next, Next1)
- int M; unsigned *Next, *Next1;
- {
- int i, j=0, n, k, temp;
- int mid, pp;
- int MM, base;
- unsigned V[WORD];
-
- base = WORD - M;
- temp = Bit[base]; Bit[base] = 0;
- for (i=0; i<WORD; i++) V[i] = 0;
- for (i=1; i<M; i++)
- {
- j=0;
- while (table[i][j] > 0 && j < 10) {
- V[i] = V[i] | Bit[base + table[i][j++]];
- }
- }
- Bit[base]=temp;
- if(M <= SHORTREG)
- {
- k = exponen(M);
- pp = 2*k;
- for(i=k; i<pp ; i++)
- { n = i;
- Next[i]= (k>>1);
- for(j=M; j>=1; j--)
- {
- if(n & Bit[WORD]) Next[i] = Next[i] | V[j];
- n = (n>>1);
- }
- }
- return;
- }
- if(M > MAXREG) fprintf(stderr, "%s: regular expression too long\n", Progname);
- MM = M;
- if(M & 1) M=M+1;
- k = exponen(M/2);
- pp = 2*k;
- mid = MM/2;
- for(i=k; i<pp ; i++)
- { n = i;
- Next[i]= (Bit[base]>>1);
- for(j=MM; j>mid ; j--)
- {
- if(n & Bit[WORD]) Next[i] = Next[i] | V[j-mid];
- n = (n>>1);
- }
- n=i-k;
- Next1[i-k] = 0;
- for(j = 0; j<mid; j++)
- {
- if(n & Bit[WORD]) Next1[i-k] = Next1[i-k] | V[MM-j];
- n = (n>>1);
- }
- }
- return;
- }
-
- exponen(m)
- int m;
- { int i, ex;
- ex= 1;
- for (i=0; i<m; i++) ex= ex*2;
- return(ex);
- }
-
- re1(Text, M, D)
- int Text, M, D;
- {
- register unsigned i, c, r0, r1, r2, r3, CMask, Newline, Init0, r_NO_ERR;
- register unsigned end;
- register unsigned hh, LL=0, k; /* Lower part */
- int FIRST_TIME=ON, num_read , j=0, base;
- unsigned A[MaxRerror+1], B[MaxRerror+1];
- unsigned Next[MaxNext], Next1[MaxNext];
- CHAR buffer[BlockSize+Maxline+1];
- int FIRST_LOOP = 1;
-
- r_NO_ERR = NO_ERR_MASK;
- if(M > 30) {
- fprintf(stderr, "%s: regular expression too long\n", Progname);
- exit(2);
- }
- base = WORD - M;
- hh = M/2;
- for(i=WORD, j=0; j < hh ; i--, j++) LL = LL | Bit[i];
- if(FIRST_IN_RE) compute_next(M, Next, Next1);
- /*SUN: try: change to memory allocation */
- FIRST_IN_RE = 0;
- Newline = '\n';
- Init[0] = Bit[base];
- if(HEAD) Init[0] = Init[0] | Bit[base+1];
- for(i=1; i<= D; i++) Init[i] = Init[i-1] | Next[Init[i-1]>>hh] | Next1[Init[i-1]&LL];
- Init1 = Init[0] | 1;
- Init0 = Init[0];
- r2 = r3 = Init[0];
- for(k=0; k<= D; k++) { A[k] = B[k] = Init[k]; }
- if ( D == 0 )
- {
- while ((num_read = read(Text, buffer + Maxline, BlockSize)) > 0)
- {
- i=Maxline; end = num_read + Maxline;
- if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end] = '\n';
- if(FIRST_LOOP) { /* if first time in the loop add a newline */
- buffer[i-1] = '\n'; /* in front the text. */
- i--;
- FIRST_LOOP = 0;
- }
- while ( i < end )
- {
- c = buffer[i++];
- CMask = Mask[c];
- if(c != Newline)
- { if(CMask != 0) {
- r1 = Init1 & r3;
- r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1;
- }
- else {
- r2 = r3 & Init1;
- }
- }
- else { j++;
- r1 = Init1 & r3; /* match against endofline */
- r2 = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | r1;
- if(TAIL) r2 = (Next[r2>>hh] | Next1[r2&LL]) | r2; /* epsilon move */
- if(( r2 & 1 ) ^ INVERSE) {
- if(FILENAMEONLY) {
- num_of_matched++;
- printf("%s\n", CurrentFileName);
- return;
- }
- r_output(buffer, i-1, end, j);
- }
- r3 = Init0;
- r2 = (Next[r3>>hh] | Next1[r3&LL]) & CMask | Init0;
- /* match begin of line */
- }
- c = buffer[i++];
- CMask = Mask[c];
- if(c != Newline)
- { if(CMask != 0) {
- r1 = Init1 & r2;
- r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;
- }
- else r3 = r2 & Init1;
- } /* if(NOT Newline) */
- else { j++;
- r1 = Init1 & r2; /* match against endofline */
- r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;
- if(TAIL) r3 = ( Next[r3>>hh] | Next1[r3&LL] ) | r3;
- /* epsilon move */
- if(( r3 & 1 ) ^ INVERSE) {
- if(FILENAMEONLY) {
- num_of_matched++;
- printf("%s\n", CurrentFileName);
- return;
- }
- r_output(buffer, i-1, end, j);
- }
- r2 = Init0;
- r3 = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | Init0;
- /* match begin of line */
- }
- } /* while i < end ... */
- strncpy(buffer, buffer+num_read, Maxline);
- } /* end while read()... */
- return;
- } /* end if (D == 0) */
- while ((num_read = read(Text, buffer + Maxline, BlockSize)) > 0)
- {
- i=Maxline; end = Maxline + num_read;
- if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end] = '\n';
- if(FIRST_TIME) { /* if first time in the loop add a newline */
- buffer[i-1] = '\n'; /* in front the text. */
- i--;
- FIRST_TIME = 0;
- }
- while (i < end )
- {
- c = buffer[i];
- CMask = Mask[c];
- if(c != Newline)
- {
- if(CMask != 0) {
- r2 = B[0];
- r1 = Init1 & r2;
- A[0] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;
- r3 = B[1];
- r1 = Init1 & r3;
- r0 = r2 | A[0]; /* A[0] | B[0] */
- A[1] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | (( r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 1) goto Nextchar;
- r2 = B[2];
- r1 = Init1 & r2;
- r0 = r3 | A[1];
- A[2] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 2) goto Nextchar;
- r3 = B[3];
- r1 = Init1 & r3;
- r0 = r2 | A[2];
- A[3] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 3) goto Nextchar;
- r2 = B[4];
- r1 = Init1 & r2;
- r0 = r3 | A[3];
- A[4] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 4) goto Nextchar;
- } /* if(CMask) */
- else {
- r2 = B[0];
- A[0] = r2 & Init1;
- r3 = B[1];
- r1 = Init1 & r3;
- r0 = r2 | A[0];
- A[1] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 1) goto Nextchar;
- r2 = B[2];
- r1 = Init1 & r2;
- r0 = r3 | A[1];
- A[2] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 2) goto Nextchar;
- r3 = B[3];
- r1 = Init1 & r3;
- r0 = r2 | A[2];
- A[3] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 3) goto Nextchar;
- r2 = B[4];
- r1 = Init1 & r2;
- r0 = r3 | A[3];
- A[4] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 4) goto Nextchar;
- }
- }
- else { j++;
- r1 = Init1 & B[D]; /* match against endofline */
- A[D] = ((Next[B[D]>>hh] | Next1[B[D]&LL]) & CMask) | r1;
- if(TAIL) A[D] = ( Next[A[D]>>hh] | Next1[A[D]&LL] ) | A[D];
- /* epsilon move */
- if(( A[D] & 1 ) ^ INVERSE) {
- if(FILENAMEONLY) {
- num_of_matched++;
- printf("%s\n", CurrentFileName);
- return;
- }
- r_output(buffer, i, end, j);
- }
- for(k=0; k<=D; k++) B[k] = Init[0];
- r1 = Init1 & B[0];
- A[0] = (( Next[B[0]>>hh] | Next1[B[0]&LL]) & CMask) | r1;
- for(k=1; k<=D; k++) {
- r3 = B[k];
- r1 = Init1 & r3;
- r2 = A[k-1] | B[k-1];
- A[k] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((B[k-1] | Next[r2>>hh] | Next1[r2&LL]) & r_NO_ERR) | r1;
- }
- }
- Nextchar: i=i+1;
- c = buffer[i];
- CMask = Mask[c];
- if(c != Newline)
- {
- if(CMask != 0) {
- r2 = A[0];
- r1 = Init1 & r2;
- B[0] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | r1;
- r3 = A[1];
- r1 = Init1 & r3;
- r0 = B[0] | r2;
- B[1] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((r2 | Next[r0>>hh] | Next1[r0&LL]) & r_NO_ERR) | r1 ;
- if(D == 1) goto Nextchar1;
- r2 = A[2];
- r1 = Init1 & r2;
- r0 = B[1] | r3;
- B[2] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 2) goto Nextchar1;
- r3 = A[3];
- r1 = Init1 & r3;
- r0 = B[2] | r2;
- B[3] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 3) goto Nextchar1;
- r2 = A[4];
- r1 = Init1 & r2;
- r0 = B[3] | r3;
- B[4] = ((Next[r2>>hh] | Next1[r2&LL]) & CMask) | ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 4) goto Nextchar1;
- } /* if(CMask) */
- else {
- r2 = A[0];
- B[0] = r2 & Init1;
- r3 = A[1];
- r1 = Init1 & r3;
- r0 = B[0] | r2;
- B[1] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 1) goto Nextchar1;
- r2 = A[2];
- r1 = Init1 & r2;
- r0 = B[1] | r3;
- B[2] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 2) goto Nextchar1;
- r3 = A[3];
- r1 = Init1 & r3;
- r0 = B[2] | r2;
- B[3] = ((r2 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 3) goto Nextchar1;
- r2 = A[4];
- r1 = Init1 & r2;
- r0 = B[3] | r3;
- B[4] = ((r3 | Next[r0>>hh] | Next1[r0&LL])&r_NO_ERR) | r1 ;
- if(D == 4) goto Nextchar1;
- }
- } /* if(NOT Newline) */
- else { j++;
- r1 = Init1 & A[D]; /* match against endofline */
- B[D] = ((Next[A[D]>>hh] | Next1[A[D]&LL]) & CMask) | r1;
- if(TAIL) B[D] = ( Next[B[D]>>hh] | Next1[B[D]&LL] ) | B[D];
- /* epsilon move */
- if(( B[D] & 1 ) ^ INVERSE) {
- if(FILENAMEONLY) {
- num_of_matched++;
- printf("%s\n", CurrentFileName);
- return;
- }
- r_output(buffer, i, end, j);
- }
- for(k=0; k<=D; k++) A[k] = Init0;
- r1 = Init1 & A[0];
- B[0] = ((Next[A[0]>>hh] | Next1[A[0]&LL]) & CMask) | r1;
- for(k=1; k<=D; k++) {
- r3 = A[k];
- r1 = Init1 & r3;
- r2 = A[k-1] | B[k-1];
- B[k] = ((Next[r3>>hh] | Next1[r3&LL]) & CMask) | ((A[k-1] | Next[r2>>hh] | Next1[r2&LL]) & r_NO_ERR) | r1;
- }
- }
- Nextchar1: i=i+1;
- } /* while */
- strncpy(buffer, buffer+num_read, Maxline);
- } /* while */
- return;
- } /* re1 */
-
- re(Text, M, D)
- int Text, M, D;
- {
- register unsigned i, c, r1, r2, r3, CMask, k, Newline, Init0, Init1, end;
- register unsigned r_even, r_odd, r_NO_ERR ;
- unsigned RMask[MAXSYM];
- unsigned A[MaxRerror+1], B[MaxRerror+1];
- int num_read, j=0, bp, lasti, base, ResidueSize;
- int FIRST_TIME; /* Flag */
- base = WORD - M;
- k = 2*exponen(M);
- if(FIRST_IN_RE) {
- compute_next(M, Next, Next1);
- FIRST_IN_RE = 0; }
- for(i=0; i< MAXSYM; i++) RMask[i] = Mask[i];
- r_NO_ERR = NO_ERR_MASK;
- Newline = '\n';
- lasti = Maxline;
- Init0 = Init[0] = Bit[base];
- if(HEAD) Init0 = Init[0] = Init0 | Bit[base+1] ;
- for(i=1; i<= D; i++) Init[i] = Init[i-1] | Next[Init[i-1]]; /* can be out? */
- Init1 = Init0 | 1;
- r2 = r3 = Init0;
- for(k=0; k<= D; k++) { A[k] = B[k] = Init[0]; } /* can be out? */
- FIRST_TIME = ON;
- if ( D == 0 )
- {
- while ((num_read = read(Text, buffer + Maxline, BlockSize)) > 0)
- {
- i=Maxline; end = Maxline + num_read ;
- if((num_read < BlockSize)&&buffer[end-1] != '\n') buffer[end] = '\n';
- if(FIRST_TIME) {
- buffer[i-1] = '\n';
- i--;
- FIRST_TIME = 0;
- }
- while (i < end)
- {
- c = buffer[i++];
- CMask = RMask[c];
- if(c != Newline)
- {
- r1 = Init1 & r3;
- r2 = (Next[r3] & CMask) | r1;
- }
- else {
- r1 = Init1 & r3; /* match against '\n' */
- r2 = Next[r3] & CMask | r1;
- j++;
- if(TAIL) r2 = Next[r2] | r2 ; /* epsilon move */
- if(( r2 & 1) ^ INVERSE) {
- if(FILENAMEONLY) {
- num_of_matched++;
- printf("%s\n", CurrentFileName);
- return;
- }
- r_output(buffer, i-1, end, j);
- }
- lasti = i - 1;
- r3 = Init0;
- r2 = (Next[r3] & CMask) | Init0;
- }
- c = buffer[i++];
- CMask = RMask[c];
- if(c != Newline)
- {
- r1 = Init1 & r2;
- r3 = (Next[r2] & CMask) | r1;
- }
- else { j++;
- r1 = Init1 & r2; /* match against endofline */
- r3 = Next[r2] & CMask | r1;
- if(TAIL) r3 = Next[r3] | r3;
- if(( r3 & 1) ^ INVERSE) {
- if(FILENAMEONLY) {
- num_of_matched++;
- printf("%s\n", CurrentFileName);
- return;
- }
- r_output(buffer, i-1, end, j);
- }
- lasti = i - 1;
- r2 = Init0;
- r3 = (Next[r2] & CMask) | Init0; /* match the newline */
- }
- } /* while */
- ResidueSize = Maxline + num_read - lasti;
- if(ResidueSize > Maxline) {
- ResidueSize = Maxline; }
- strncpy(buffer+Maxline-ResidueSize, buffer+lasti, ResidueSize);
- lasti = Maxline - ResidueSize;
- } /* while */
- return;
- } /* end if(D==0) */
- while ((num_read = read(Text, buffer + Maxline, BlockSize)) > 0)
- {
- i=Maxline; end = Maxline+num_read;
- if((num_read < BlockSize) && buffer[end-1] != '\n') buffer[end] = '\n';
- if(FIRST_TIME) {
- buffer[i-1] = '\n';
- i--;
- FIRST_TIME = 0;
- }
- while (i < end)
- { c = buffer[i++];
- CMask = RMask[c];
- if (c != Newline)
- {
- r_even = B[0];
- r1 = Init1 & r_even;
- A[0] = (Next[r_even] & CMask) | r1;
- r_odd = B[1];
- r1 = Init1 & r_odd;
- r2 = (r_even | Next[r_even|A[0]]) &r_NO_ERR;
- A[1] = (Next[r_odd] & CMask) | r2 | r1 ;
- if(D == 1) goto Nextchar;
- r_even = B[2];
- r1 = Init1 & r_even;
- r2 = (r_odd | Next[r_odd|A[1]]) &r_NO_ERR;
- A[2] = (Next[r_even] & CMask) | r2 | r1 ;
- if(D == 2) goto Nextchar;
- r_odd = B[3];
- r1 = Init1 & r_odd;
- r2 = (r_even | Next[r_even|A[2]]) &r_NO_ERR;
- A[3] = (Next[r_odd] & CMask) | r2 | r1 ;
- if(D == 3) goto Nextchar;
- r_even = B[4];
- r1 = Init1 & r_even;
- r2 = (r_odd | Next[r_odd|A[3]]) &r_NO_ERR;
- A[4] = (Next[r_even] & CMask) | r2 | r1 ;
- goto Nextchar;
- } /* if NOT Newline */
- else { j++;
- r1 = Init1 & B[D]; /* match endofline */
- A[D] = (Next[B[D]] & CMask) | r1;
- if(TAIL) A[D] = Next[A[D]] | A[D];
- if((A[D] & 1) ^ INVERSE ) {
- if(FILENAMEONLY) {
- num_of_matched++;
- printf("%s\n", CurrentFileName);
- return;
- }
- r_output(buffer, i-1, end, j);
- }
- for(k=0; k<= D; k++) { A[k] = B[k] = Init[k]; }
- r1 = Init1 & B[0];
- A[0] = (Next[B[0]] & CMask) | r1;
- for(k=1; k<= D; k++) {
- r1 = Init1 & B[k];
- r2 = (B[k-1] | Next[A[k-1]|B[k-1]]) &r_NO_ERR;
- A[k] = (Next[B[k]] & CMask) | r1 | r2;
- }
- }
- Nextchar:
- c = buffer[i];
- CMask = RMask[c];
- if(c != Newline)
- {
- r1 = Init1 & A[0];
- B[0] = (Next[A[0]] & CMask) | r1;
- r1 = Init1 & A[1];
- B[1] = (Next[A[1]] & CMask) | ((A[0] | Next[A[0] | B[0]]) & r_NO_ERR) | r1 ;
- if(D == 1) goto Nextchar1;
- r1 = Init1 & A[2];
- B[2] = (Next[A[2]] & CMask) | ((A[1] | Next[A[1] | B[1]]) &r_NO_ERR) | r1 ;
- if(D == 2) goto Nextchar1;
- r1 = Init1 & A[3];
- B[3] = (Next[A[3]] & CMask) | ((A[2] | Next[A[2] | B[2]])&r_NO_ERR) | r1 ;
- if(D == 3) goto Nextchar1;
- r1 = Init1 & A[4];
- B[4] = (Next[A[4]] & CMask) | ((A[3] | Next[A[3] | B[3]])&r_NO_ERR) | r1 ;
- goto Nextchar1;
- } /* if(NOT Newline) */
- else { j++;
- r1 = Init1 & A[D]; /* match endofline */
- B[D] = (Next[A[D]] & CMask) | r1;
- if(TAIL) B[D] = Next[B[D]] | B[D];
- if((B[D] & 1) ^ INVERSE ) {
- if(FILENAMEONLY) {
- num_of_matched++;
- printf("%s\n", CurrentFileName);
- return;
- }
- r_output(buffer, i, end, j);
- }
- for(k=0; k<= D; k++) { A[k] = B[k] = Init[k]; }
- r1 = Init1 & A[0];
- B[0] = (Next[A[0]] & CMask) | r1;
- for(k=1; k<= D; k++) {
- r1 = Init1 & A[k];
- r2 = (A[k-1] | Next[A[k-1]|B[k-1]])&r_NO_ERR;
- B[k] = (Next[A[k]] & CMask) | r1 | r2;
- }
- }
- Nextchar1: i++;
- } /* while i < end */
- strncpy(buffer, buffer+num_read, Maxline);
- } /* while read() */
- return;
- } /* re */
-
-
- r_output (buffer, i, end, j)
- int i, end, j;
- CHAR *buffer;
- {
- int bp;
- if(i >= end) return;
- num_of_matched++;
- if(COUNT) return;
- if(FNAME) printf("%s: ", CurrentFileName);
- bp = i-1;
- while ((buffer[bp] != '\n') && (bp > 0)) bp--;
- if(LINENUM) printf("%d: ", j);
- if(buffer[bp] != '\n') bp = Maxline-1;
- bp++;
- while (bp <= i ) putchar(buffer[bp++]);
- }
-
- main(argc, argv)
- int argc; char *argv[];
- {
- int N, M, D=0, fp, fd, i, j;
- char c;
- int filetype;
- unsigned char Pattern[MAXPAT], OldPattern[MAXPAT], temp[MAXPAT];
-
- initial_value();
- strcpy(Progname, argv[0]);
- if (argc < 2) usage();
- Pattern[0] = '\0';
- while(--argc > 0 && (*++argv)[0] == '-') {
- c = *(argv[0]+1);
- switch(c) {
- case 'c' : COUNT = ON; /* output the # of matched */
- break;
- case 's' : SILENT = ON; /* silent mode */
- break;
- case 'p' : I = 0; /* insertion cost is 0 */
- break;
- case 'x' : WHOLELINE = ON; /* match the whole line */
- if(WORDBOUND) {
- fprintf(stderr, "%s: illegal option combination\n", Progname);
- exit(2);
- }
- break;
- case 'L' : break;
- case 'd' : DELIMITER = ON; /* user defines delimiter */
- if(argc <= 1) usage();
- if (argv[0][2] == '\0') {/* space after -d option */
- argv++;
- if ((D_length = strlen(argv[0])) > MaxDelimit) {
- fprintf(stderr, "%s: delimiter pattern too long\n", Progname);
- exit(2);
- }
- D_pattern[0] = '<';
- strcpy(D_pattern+1, argv[0]);
- argc--;
- } else {
- if ((D_length = strlen(argv[0]+2)) > MaxDelimit) {
- fprintf(stderr, "%s: delimiter pattern too long\n", Progname);
- exit(2);
- }
- D_pattern[0] = '<';
- strcpy(D_pattern+1, argv[0]+2);
- } /* else */
- strcat(D_pattern, ">; ");
- D_length++; /* to count ';' as one */
- break;
- case 'e' : argc--;
- if(argc == 0) {
- fprintf(stderr, "%s: the pattern should immediately follow the -e option\n", Progname);
- usage();
- }
- if((++argv)[0][0] == '-') {
- Pattern[0] = '\\';
- strcat(Pattern, (argv)[0]);
- }
- else strcat(Pattern, argv[0]);
- break;
- case 'f' : PAT_FILE = ON;
- argv++;
- argc--;
- if((fp = open(argv[0], 0)) < 0) {
- fprintf(stderr, "%s: Can't open pattern file %s\n", Progname, argv[0]);
- exit(2);
- }
- break;
- case 'h' : NOFILENAME = ON;
- break;
- case 'i' : NOUPPER = ON;
- break;
- case 'k' : argc--;
- if(argc == 0) {
- fprintf(stderr, "%s: the pattern should immediately follow the -k option\n", Progname);
- usage();
- }
- CONSTANT = ON;
- argv++;
- strcat(Pattern, argv[0]);
- if(argc > 1 && argv[1][0] == '-') {
- fprintf(stderr, "%s: -k should be the last option in the command\n", Progname);
- exit(2);
- }
- break;
- case 'l' : FILENAMEONLY = ON;
- break;
- case 'n' : LINENUM = ON; /* output prefixed by line no*/
- break;
- case 'v' : INVERSE = ON; /* output no-matched lines */
- break;
- case 't' : OUTTAIL = ON; /* output from tail of delimiter */
- break;
- case 'B' : BESTMATCH = ON;
- break;
- case 'w' : WORDBOUND = ON;/* match to words */
- if(WHOLELINE) {
- fprintf(stderr, "%s: illegal option combination\n", Progname);
- exit(2);
- }
- break;
- case 'y' : NOPROMPT = ON;
- break;
- case 'I' : I = atoi(argv[0]+2); /* Insertion Cost */
- JUMP = ON;
- break;
- case 'S' : S = atoi(argv[0]+2); /* Substitution Cost */
- JUMP = ON;
- break;
- case 'D' : DD = atoi(argv[0]+2); /* Deletion Cost */
- JUMP = ON;
- break;
- case 'G' : FILEOUT = ON;
- COUNT = ON;
- break;
- default : if (isdigit(c)) {
- APPROX = ON;
- D = atoi(argv[0]+1);
- if (D > MaxError) {
- fprintf(stderr,"%s: the maximum number of errors is %d \n", Progname, MaxError);
- exit(2);
- }
- } else {
- fprintf(stderr, "%s: illegal option -%c\n",Progname, c);
- usage();
- }
- } /* switch(c) */
- } /* while (--argc > 0 && (*++argv)[0] == '-') */
-
- if (FILENAMEONLY && NOFILENAME) {
- fprintf(stderr, "%s: -h and -l options are mutually exclusive\n",Progname);
- }
- if (COUNT && (FILENAMEONLY || NOFILENAME)) {
- FILENAMEONLY = OFF;
- if(!FILEOUT) NOFILENAME = OFF;
- }
- if (!(PAT_FILE) && Pattern[0] == '\0') { /* Pattern not set with -e option */
- if (argc == 0) usage();
- strcpy(Pattern, *argv);
- argc--;
- argv++;
- }
- Numfiles = 0;
- fd = 3; /* make sure it's not 0 */
- if (argc == 0) /* check Pattern against stdin */
- fd = 0;
- else {
- if (!(Textfiles = (CHAR **)malloc(argc * sizeof(CHAR *) ))) {
- fprintf(stderr, "%s: malloc failure (you probably don't have enough memory)\n", Progname);
- exit(2);
- }
- while (argc--) { /* one or more filenames on command line -- put
- the valid filenames in a array of strings */
- /*
- if ((filetype = check_file(*argv)) != ISASCIIFILE) {
- if(filetype == NOSUCHFILE) fprintf(stderr,"%s: %s: no such file or directory\n",Progname,*argv);
- argv++;
- */
-
- if ((filetype = check_file(*argv)) == NOSUCHFILE) {
- if(filetype == NOSUCHFILE) fprintf(stderr,"%s: %s: no such file or directory\n",Progname,*argv);
- argv++;
- } else { /* file is ascii*/
- if (!(Textfiles[Numfiles] = (CHAR *)malloc((strlen(*argv)+1)))) {
- fprintf(stderr, "%s: malloc failure (you probably don't have enough memory)\n", Progname);
- exit(2);
- }
- strcpy(Textfiles[Numfiles++], *argv++);
- } /* else */
- } /* while (argc--) */
- } /* else */
- checksg(Pattern, D); /* check if the pattern is simple */
- strcpy(OldPattern, Pattern);
- if (SGREP == 0) {
- preprocess(D_pattern, Pattern);
- strcpy(old_D_pat, D_pattern);
- M = maskgen(Pattern, D);
- }
- else M = strlen(OldPattern);
- if (PAT_FILE) prepf(fp);
- if (Numfiles > 1) FNAME = ON;
- if (NOFILENAME) FNAME = 0;
- num_of_matched = 0;
- compat(); /* check compatibility between options */
- if (fd == 0) {
- if(FILENAMEONLY) {
- fprintf(stderr, "%s: -l option is not compatible with standard input\n", Progname);
- exit(2);
- }
- if(PAT_FILE) mgrep(fd);
- else {
- if(SGREP) sgrep(OldPattern, strlen(OldPattern), fd, D);
- else bitap(old_D_pat, Pattern, fd, M, D);
- }
- if (COUNT) {
- if(INVERSE && PAT_FILE) printf("%d\n", total_line-num_of_matched);
- else printf("%d\n", num_of_matched);
- }
- }
- else {
- for (i = 0; i < Numfiles; i++, close(fd), num_of_matched = 0) {
- strcpy(CurrentFileName, Textfiles[i]);
- if ((fd = open(Textfiles[i], 0)) <= 0) {
- fprintf(stderr, "%s: can't open file %s\n",Progname, Textfiles[i]);
- }
- else {
- if(PAT_FILE) mgrep(fd);
- else {
- if(SGREP) sgrep(OldPattern, strlen(OldPattern), fd, D);
- else bitap(old_D_pat, Pattern, fd, M, D);
- }
- if (num_of_matched) NOMATCH = OFF;
- if (COUNT && !FILEOUT) {
- if(INVERSE && PAT_FILE) {
- if(FNAME) printf("%s: %d\n", CurrentFileName, total_line - num_of_matched);
- else printf("%d\n", total_line - num_of_matched);
- }
- else {
- if(FNAME) printf("%s: %d\n", CurrentFileName, num_of_matched);
- else printf("%d\n", num_of_matched);
- }
- } /* if COUNT */
- if(FILEOUT && num_of_matched) {
- file_out(CurrentFileName);
- }
- } /* else */
- } /* for i < Numfiles */
- if(NOMATCH && BESTMATCH) {
- if(WORDBOUND || WHOLELINE || LINENUM || INVERSE) {
- SGREP = 0;
- preprocess(D_pattern, Pattern);
- strcpy(old_D_pat, D_pattern);
- M = maskgen(Pattern, D);
- }
- COUNT=ON; D=1;
- while(D<M && D<=MaxError && num_of_matched == 0) {
- for (i = 0; i < Numfiles; i++, close(fd)) {
- strcpy(CurrentFileName, Textfiles[i]);
- if ((fd = open(Textfiles[i], 0)) > 0) {
- if(PAT_FILE) mgrep(fd);
- else {
- if(SGREP) sgrep(OldPattern,strlen(OldPattern),fd,D);
- else bitap(old_D_pat,Pattern,fd,M,D);
- }
- }
- } /* for i < Numfiles */
- D++;
- } /* while */
- if(num_of_matched > 0) {
- D--; COUNT = 0;
- if(NOPROMPT) goto GO_AHEAD;
- if(D==1) fprintf(stderr, "best match has 1 error, ");
- else fprintf(stderr, "best match has %d errors, ", D);
- fflush(stderr);
- if(num_of_matched == 1) fprintf(stderr,"there is 1 match, output it? (y/n)");
- else fprintf(stderr,"there are %d matches, output them? (y/n)", num_of_matched);
- scanf("%c",&c);
- if(c != 'y') goto CONT;
- GO_AHEAD:
- for (i = 0; i < Numfiles; i++, close(fd)) {
- strcpy(CurrentFileName, Textfiles[i]);
- if ((fd = open(Textfiles[i], 0)) > 0) {
- if(PAT_FILE) mgrep(fd);
- else {
- if(SGREP) sgrep(OldPattern,strlen(OldPattern),fd,D);
- else bitap(old_D_pat,Pattern,fd,M,D);
- }
- }
- } /* for i < Numfiles */
- NOMATCH = 0;
- }
- }
- }
- CONT:
- if(EATFIRST) {
- printf("\n");
- EATFIRST = OFF;
- }
- if(num_of_matched) NOMATCH = OFF;
- if(NOMATCH) exit(1);
- exit(0);
- } /* end of main() */
-
-
- file_out(fname)
- char *fname;
- {
- int num_read;
- int fd;
- int i, len;
- CHAR buf[4097];
- if(FNAME) {
- len = strlen(fname);
- putchar('\n');
- for(i=0; i< len; i++) putchar(':');
- putchar('\n');
- printf("%s\n", CurrentFileName);
- len = strlen(fname);
- for(i=0; i< len; i++) putchar(':');
- putchar('\n');
- fflush(stdout);
- }
- fd = open(fname, 0);
- while((num_read = read(fd, buf, 4096)) > 0)
- write(1, buf, num_read);
- }
-
-
- usage()
- {
- fprintf(stderr, "usage: %s [-#cdehiklnpstvwxBDGIS] [-f patternfile] pattern [files]\n", Progname);
- printf("\n");
- fprintf(stderr, "summary of frequently used options:\n");
- fprintf(stderr, "-#: find matches with at most # errors\n");
- fprintf(stderr, "-c: output the number of matched records\n");
- fprintf(stderr, "-d: define record delimiter\n");
- fprintf(stderr, "-h: do not output file names\n");
- fprintf(stderr, "-i: case-insensitive search, e.g., 'a' = 'A'\n");
- fprintf(stderr, "-l: output the names of files that contain a match\n");
- fprintf(stderr, "-n: output record prefixed by record number\n");
- fprintf(stderr, "-v: output those records containing no matches\n");
- fprintf(stderr, "-w: pattern has to match as a word, e.g., 'win' will not match 'wind'\n");
- fprintf(stderr, "-B: best match mode. find the closest matches to the pattern\n");
- fprintf(stderr, "-G: output the files that contain a match\n");
- printf("\n");
-
- exit(2);
- }
-
- checksg(Pattern, D)
- CHAR *Pattern; int D;
- {
- char c;
- int i, m;
- m = strlen(Pattern);
- if(!(PAT_FILE) && m <= D) {
- fprintf(stderr, "%s: size of pattern must be greater than number of errors\n", Progname);
- exit(2);
- }
- SIMPLEPATTERN = ON;
- for (i=0; i < m; i++)
- {
- switch(Pattern[i])
- {
- case ';' : SIMPLEPATTERN = OFF; break;
- case ',' : SIMPLEPATTERN = OFF; break;
- case '.' : SIMPLEPATTERN = OFF; break;
- case '*' : SIMPLEPATTERN = OFF; break;
- case '-' : SIMPLEPATTERN = OFF; break;
- case '[' : SIMPLEPATTERN = OFF; break;
- case ']' : SIMPLEPATTERN = OFF; break;
- case '(' : SIMPLEPATTERN = OFF; break;
- case ')' : SIMPLEPATTERN = OFF; break;
- case '<' : SIMPLEPATTERN = OFF; break;
- case '>' : SIMPLEPATTERN = OFF; break;
- case '^' : if(D > 0) SIMPLEPATTERN = OFF;
- break;
- case '$' : if(D > 0) SIMPLEPATTERN = OFF;
- break;
- case '|' : SIMPLEPATTERN = OFF; break;
- case '#' : SIMPLEPATTERN = OFF; break;
- case '\\' : SIMPLEPATTERN = OFF; break;
- default : break;
- }
- }
- if (CONSTANT) SIMPLEPATTERN = ON;
- if (SIMPLEPATTERN == OFF) return;
- if (NOUPPER && D) return;
- if (JUMP == ON) return;
- if (I == 0) return;
- if (LINENUM) return;
- if (DELIMITER) return;
- if (INVERSE) return;
- if (WORDBOUND && D > 0) return;
- if (WHOLELINE && D > 0) return;
- if (SILENT) return; /* REMINDER: to be removed */
- SGREP = ON;
- if(m >= 16) DNA = ON;
- for(i=0; i<m; i++) {
- c = Pattern[i];
- if(c == 'a' || c == 'c' || c == 't' || c == 'g' ) ;
- else DNA = OFF;
- }
- return;
- }
-
- output (buffer, i1, i2, j)
- register CHAR *buffer; int i1, i2, j;
- {
- register CHAR *bp, *outend;
- if(i1 > i2) return;
- num_of_matched++;
- if(COUNT) return;
- if(SILENT) return;
- if(OUTTAIL) {
- i1 = i1 + D_length;
- i2 = i2 + D_length;
- }
- if(DELIMITER) j = j+1;
- if(FIRSTOUTPUT) {
- if (buffer[i1] == '\n') {
- i1++;
- EATFIRST = ON;
- }
- FIRSTOUTPUT = 0;
- }
- if(TRUNCATE) {
- fprintf(stderr, "WARNING!!! some lines have been truncated in output record #%d\n", num_of_matched-1);
- }
- while(buffer[i1] == '\n' && i1 <= i2) {
- printf("\n");
- i1++;
- }
- if(FNAME == ON) printf("%s: ", CurrentFileName);
- if(LINENUM) printf("%d: ", j-1);
- bp = buffer + i1;
- outend = buffer + i2;
- while(bp <= outend) putchar(*bp++);
- }
-
- /* end of main.c */
-